library(ggplot2)
library(ggpubr)
#library(grid)
#library(gridExtra)
library(ggrepel)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(RColorBrewer)
FABIA.top5= readRDS(file = "data/sel_features/results_fabia_top_5_percent.rds")
FABIA.top10= readRDS(file = "data/sel_features/results_fabia_top_10_percent.rds")
FABIA.top25= readRDS(file = "data/sel_features/results_fabia_top_25_percent.rds")
FABIA.Bic = readRDS(file = "data/sel_features/results_fabia_extracted_by_bic.rds")
MOFA.top5= readRDS(file = "data/sel_features/results_mofa_top_5_percent.rds")
MOFA.top10= readRDS(file = "data/sel_features/results_mofa_top_10_percent.rds")
MOFA.top25= readRDS(file = "data/sel_features/results_mofa_top_25_percent.rds")
MOFA.Bic = readRDS(file = "data/sel_features/results_mofa_extracted_by_bic.rds")
merge.FABIA.MOFA.results = function(FABIA.results, MOFA.results, total_bcf, bc, f = bc, complete = F) {
if (bc > total_bcf) {
stop("Requested bicluster number exceeds the total available biclusters.")
}
if (!(is.na(f)) & f > total_bcf) {
stop("Requested factor number exceeds the total available factors.")
}
xbc = paste(total_bcf, "bc", sep = "")
biclusterx = paste("bicluster", bc, sep = "")
xf = paste(total_bcf, "f", sep = "")
Factorx = paste("Factor", f, sep = "")
FABIA.loadings = FABIA.results[[xbc]][[biclusterx]]
MOFA.weights = MOFA.results[[xf]][[Factorx]]
FABIA.df = as.data.frame(FABIA.loadings)
MOFA.df = as.data.frame(MOFA.weights)
FABIA.df$var = rownames(FABIA.df)
MOFA.df$var = rownames(MOFA.df)
#FABIA.df$FABIA.absL = abs(FABIA.df$FABIA.loadings) # Absolute values dropped: Calculated now by ggplot
#MOFA.df$MOFA.absW = abs(MOFA.df$MOFA.weights)
out = full_join(FABIA.df, MOFA.df, by = "var")
if (complete) {
out = out[complete.cases(out), ]
}
out
}
#fm.1.1 = full_join(FABIA.top5$`1bc`$bicluster1, MOFA.top5$`1bc`$bicluster1, by = )
# Old code to check number of shared features across FABIA and MOFA results.
# Deprecated as now FABIA & MOFA results are plotted in grids
check.FABIA.MOFA.overlap = function(FABIA.results, MOFA.results, total_bcf, against.plot = F) {
out.matrix = matrix(, nrow = total_bcf, ncol = total_bcf)
for(f in 1:total_bcf) {
for(bc in 1:total_bcf) {
curr.fm = merge.FABIA.MOFA.results(FABIA.results, MOFA.results, total_bcf, bc = bc, f = f)
overlap = sum(complete.cases(curr.fm))
#If using this function to check against plots, overlaps <= 2 are treated as no overlap
if (against.plot & overlap < 3) {
overlap = 0
}
out.matrix[f,bc] = overlap
}
}
out.matrix
}
# Extract data omics category and add color for plotting
retrieve.dataset.type = function(var.name) {
if (startsWith(var.name, "hsa")) {
out = "miRNASeqGene"
} else if (endsWith(var.name, "RPPA")) {
out = "RPPAArray"
} else if (endsWith(var.name, "R2Gn")) {
out = "RNASeq2GeneNorm"
} else {
out = NA
}
out
}
data.colors = setNames(c("red", "forestgreen", "blue"), c("miRNASeqGene", "RPPAArray", "RNASeq2GeneNorm"))
#Default data for generating a blank grid
default.data = data.frame(var = c("ph1", "ph2"),
FABIA.loadings = c(.8, 1.2),
MOFA.weights = c(.8, 1.2),
dataset = c("placeholder", "placeholder"))
plot.mf = function(plot.data) {
#plot.data = merged.mf.results[complete.cases(merged.mf.results), ] # Select complete cases only to avoid warnings, now deprecated as complete cases selection is done in FABIA-MOFA result merge.
plot.data$dataset = sapply(plot.data$var, retrieve.dataset.type)
if (nrow(plot.data) >= 3) { # Minimum of 3 entries required to plot
p = ggplot(plot.data, aes(x=FABIA.loadings, y = MOFA.weights)) +
geom_point(show.legend = F, aes(x = abs(FABIA.loadings), y = abs(MOFA.weights),color = dataset)) +
stat_cor(method = "spearman", label.y.npc="top", label.x.npc = "left") +
theme(
axis.title = element_blank()
) +
scale_color_manual(values = data.colors) #+
#xlim(min(abs(plot.data$FABIA.loadings)), max(abs(plot.data$FABIA.loadings)))
#geom_smooth(method = "lm") +
#geom_text(nudge_y = .01, alpha = 0.4)
} else { # Print blank default plot if 2 or fewer shared variables.
#print(1)
p = ggplot(data = default.data, aes(x=FABIA.loadings, y = MOFA.weights, color = dataset)) +
theme(
axis.title = element_blank()
)
}
p
}
#Plots without Spearman Correlation. Sets the range of x and y values to the min and max absolute values to allow better viewing.
plot.mf.nospear = function(plot.data) {
#plot.data = merged.mf.results[complete.cases(merged.mf.results), ] # Select complete cases only to avoid warnings, now deprecated as complete cases selection is done in FABIA-MOFA result merge.
plot.data$dataset = sapply(plot.data$var, retrieve.dataset.type)
if (nrow(plot.data) >= 3) { # Minimum of 3 entries required to plot
p = ggplot(plot.data, aes(x=FABIA.loadings, y = MOFA.weights)) +
geom_point(show.legend = F, aes(x = abs(FABIA.loadings), y = abs(MOFA.weights),color = dataset)) +
#stat_cor(method = "spearman", label.y.npc="top", label.x.npc = "left") +
theme(
axis.title = element_blank()
) +
scale_color_manual(values = data.colors) +
xlim(min(abs(plot.data$FABIA.loadings)), max(abs(plot.data$FABIA.loadings))) +
ylim(min(abs(plot.data$MOFA.weights)), max(abs(plot.data$MOFA.weights)))
#geom_smooth(method = "lm") +
#geom_text(nudge_y = .01, alpha = 0.4)
} else { # Print blank default plot if 2 or fewer shared variables.
#print(1)
p = ggplot(data = default.data, aes(x=FABIA.loadings, y = MOFA.weights, color = dataset)) +
theme(
axis.title = element_blank()
)
}
p
}
plot.mf.grid = function(FABIA.results, MOFA.results, total_bcf,
f_end = total_bcf, bc_end = total_bcf, f_start = 1, bc_start = 1, plot.spearman = T) {
plot.list = vector('list', (f_end - f_start+1) * (bc_end - bc_start + 1))
#spearman.list = vector('character', (f_end - f_start+1) * (bc_end - bc_start + 1))
for(bc in bc_start:bc_end) {
for(f in f_start:f_end) {
curr.mf = merge.FABIA.MOFA.results(FABIA.results, MOFA.results, total_bcf, bc = bc, f = f, complete = T)
if(plot.spearman) {
plot.i = plot.mf(curr.mf)
} else {
plot.i = plot.mf.nospear(curr.mf)
}
plot.list[[(bc_end - bc_start+1)*(f-f_start)+(bc-bc_start+1)]] = plot.i
#if (sum(complete.cases(curr.mf)) >= 3) {
# spearman.data = curr.mf[complete.cases(curr.mf), ]
# spearman.test = cor.test(x = spearman.data$FABIA.loadings,
# y = spearman.data$MOFA.weights,
# method="spearman")
# spearman.list[[(bc_end - bc_start+1)*(f-f_start)+(bc-bc_start+1)]] == paste("Rho:", spearman.test$estimate, "P-value:", spearman.test$estimate)
#}
}
}
#plot.list
f = ggarrange(plotlist = plot.list,
common.legend = T,
legend = "right",
#labels = spearman.list,
nrow = f_end-f_start+1, ncol = bc_end-bc_start+1)
#grid.arrange(
# arrangeGrob(grobs = plot.list, ncol = total_bcf, nrow = total_bcf,
# bottom=textGrob("FABIA Loadings (Absolute Value)", gp=gpar(fontface="bold", col="red", fontsize=15)),
# left=textGrob("MOFA Weights, (Absolute Value)", gp=gpar(fontface="bold", col="blue", fontsize=15), rot=90)),
#)
bcf.text = paste(total_bcf, "Biclusters/Factors")
top.annotate = text_grob(bcf.text, color = "black", face = "bold", size = 20)
if (any(bc_end != total_bcf, f_end != total_bcf, bc_start != 1, f_start != 1)) {
bcf.text = paste(bcf.text, "\n", "(Biclusters", bc_start, "to", bc_end, ",",
"Factors", f_start, "to", f_end,")")
top.annotate = text_grob(bcf.text, color = "black", face = "bold", size = 10)
}
annotate_figure(f,
top = top.annotate,
bottom = text_grob("FABIA Loadings (Absolute Value)", color = "blue", face = "bold", size = 15),
left = text_grob("MOFA Weights, (Absolute Value)", color = "forestgreen", face = "bold", size = 15, rot = 90))
#print(spearman.list)
}
# Plot only one pair of bicluster/factor with added labels support (for individual plots exploration)
plot.mf.one = function(FABIA.results, MOFA.results, total_bcf, bc, f) {
plot.data = merge.FABIA.MOFA.results(FABIA.results, MOFA.results, total_bcf, bc, f, complete = T)
plot.data$dataset = sapply(plot.data$var, retrieve.dataset.type)
if (nrow(plot.data) >= 3) { # Minimum of 3 entries required to plot
p = ggplot(plot.data, aes(x=FABIA.loadings, y = MOFA.weights, label = var)) +
geom_point(show.legend = F, aes(color = dataset)) +
stat_cor(method = "spearman", label.y.npc="bottom", label.x.npc = 0.8) +
scale_color_manual(values = data.colors) +
geom_text_repel(data=plot.data, alpha = 1, size = 3,
force = 300, force_pull = 1,
max.overlaps = 10)
} else { # Print blank default plot if 2 or fewer shared variables.
p = ggplot(data = default.data, aes(x=FABIA.loadings, y = MOFA.weights, color = dataset))
}
p = p + theme(
axis.title = element_blank()
)
oneplot.text = paste("Bicluster", bc, "&", "Factor", f, "\n(Of", total_bcf, "Biclusters/Factors)")
top.annotate = text_grob(oneplot.text, color = "black", face = "bold", size = 10)
annotate_figure(p,
top = top.annotate,
bottom = text_grob("FABIA Loadings", color = "blue", face = "bold", size = 15),
left = text_grob("MOFA Weights", color = "forestgreen", face = "bold", size = 15, rot = 90))
}
# Old code for manually computing Spearman correlation.
# Deprecated in favor of plotting it directly on the plot.
spearman.FABIA.MOFA = function(FABIA.results, MOFA.results, total_bcf) {
out.matrix = matrix(, nrow = total_bcf, ncol = total_bcf)
for(f in 1:total_bcf) {
for(bc in 1:total_bcf) {
curr.fm = merge.FABIA.MOFA.results(FABIA.results, MOFA.results, total_bcf, bc = bc, f = f)
spearman.data = curr.fm[complete.cases(curr.fm), ]
#print(spearman.data)
#out.matrix[bc,f] = cor.test(x = spearman.data$FABIA.absL, y = spearman.data$MOFA.absW, method="spearman")
test = cor.test(x = spearman.data$FABIA.loadings, y = spearman.data$MOFA.weights, method="spearman")
}
}
#out.matrix
test
}
check.FABIA.MOFA.overlap(FABIA.Bic, MOFA.Bic, 1, against.plot = T)
## [,1]
## [1,] 49
plot.mf.grid(FABIA.Bic, MOFA.Bic, 1)
check.FABIA.MOFA.overlap(FABIA.Bic, MOFA.Bic, 2, against.plot = T)
## [,1] [,2]
## [1,] 10 17
## [2,] 15 51
plot.mf.grid(FABIA.Bic, MOFA.Bic, 2)
plot.mf.grid(FABIA.Bic, MOFA.Bic, 2, plot.spearman = F)
check.FABIA.MOFA.overlap(FABIA.Bic, MOFA.Bic, 3, against.plot = T)
## [,1] [,2] [,3]
## [1,] 51 27 14
## [2,] 8 12 22
## [3,] 0 0 0
plot.mf.grid(FABIA.Bic, MOFA.Bic, 3)
plot.mf.grid(FABIA.Bic, MOFA.Bic, 3, plot.spearman = F)
check.FABIA.MOFA.overlap(FABIA.Bic, MOFA.Bic, 5, against.plot = T)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 4 5 0 0 0
## [2,] 72 55 16 15 16
## [3,] 37 20 10 16 20
## [4,] 0 0 0 0 0
## [5,] 0 0 0 0 0
#dev.new(width = 10, height = 10, unit = "in", noRStudioGD = T)
plot.mf.grid(FABIA.Bic, MOFA.Bic, 5)
plot.mf.grid(FABIA.Bic, MOFA.Bic, 5, plot.spearman = F)
check.FABIA.MOFA.overlap(FABIA.Bic, MOFA.Bic, 5, against.plot = T)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 4 5 0 0 0
## [2,] 72 55 16 15 16
## [3,] 37 20 10 16 20
## [4,] 0 0 0 0 0
## [5,] 0 0 0 0 0
#dev.new(width = 20, height = 20, unit = "in", noRStudioGD = T)
plot.mf.grid(FABIA.Bic, MOFA.Bic, 10)
plot.mf.grid(FABIA.Bic, MOFA.Bic, 10, plot.spearman = F)
## 1B. Individual Plots Worth Exploring:
plot.mf.one(FABIA.Bic, MOFA.Bic, 1, 1, 1)
plot.mf.one(FABIA.Bic, MOFA.Bic, 3, 2, 1)
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
plot.mf.one(FABIA.Bic, MOFA.Bic, 3, 3, 1)
plot.mf.one(FABIA.Bic, MOFA.Bic, 5, 1, 2)
## Warning: ggrepel: 46 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
plot.mf.one(FABIA.Bic, MOFA.Bic, 5, 2, 2)
## Warning: ggrepel: 28 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
plot.mf.one(FABIA.Bic, MOFA.Bic, 5, 2, 3)
plot.mf.one(FABIA.Bic, MOFA.Bic, 5, 3, 2)
plot.mf.one(FABIA.Bic, MOFA.Bic, 5, 5, 2)
plot.mf.one(FABIA.Bic, MOFA.Bic, 10, 1, 2)
plot.mf.one(FABIA.Bic, MOFA.Bic, 10, 2, 2)
## Warning: ggrepel: 25 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
plot.mf.one(FABIA.Bic, MOFA.Bic, 10, 3, 2)
plot.mf.one(FABIA.Bic, MOFA.Bic, 10, 9, 2)
## Warning: ggrepel: 36 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
plot.mf.one(FABIA.Bic, MOFA.Bic, 10, 10, 2)
## Warning: ggrepel: 30 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
plot.mf.one(FABIA.Bic, MOFA.Bic, 10, 7, 4)
## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
plot.mf.one(FABIA.Bic, MOFA.Bic, 10, 9, 4)
plot.mf.one(FABIA.Bic, MOFA.Bic, 10, 10, 4)
## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Export overlaping Biclusters with linear combination of bicluster and factor
fabia.mofa = list("b1f1.1" = merge.FABIA.MOFA.results(FABIA.Bic, MOFA.Bic, 1, 1, 1, complete = T),
"b1f2.10" = merge.FABIA.MOFA.results(FABIA.Bic, MOFA.Bic, 10, 1, 2, complete = T))
saveRDS(fabia.mofa, file = "data/sel_features/fabia_mofa_overlap_of_interest.rds")
Deprecated, kept for completeness:
plot.mf.grid(FABIA.top5, MOFA.top5, 1)
plot.mf.grid(FABIA.top5, MOFA.top5, 2)
plot.mf.grid(FABIA.top5, MOFA.top5, 3)
#dev.new(width = 10, height = 10, unit = "in", noRStudioGD = T)
plot.mf.grid(FABIA.top5, MOFA.top5, 5)
plot.mf.grid(FABIA.top5, MOFA.top5, 10, 5, 5, 1, 1)
plot.mf.grid(FABIA.top5, MOFA.top5, 10, 5, 10, 1, 6)
plot.mf.grid(FABIA.top5, MOFA.top5, 10, 10, 5, 6, 1)
plot.mf.grid(FABIA.top5, MOFA.top5, 10, 10, 10, 6, 6)
#dev.new(width = 20, height = 20, unit = "in", noRStudioGD = T)
plot.mf.grid(FABIA.top5, MOFA.top5, 10)
plot.mf.one(FABIA.top5, MOFA.top5, 1, 1, 1)
plot.mf.one(FABIA.top5, MOFA.top5, 5, 2, 2)
plot.mf.one(FABIA.top5, MOFA.top5, 10, 1, 2)
plot.mf.one(FABIA.top5, MOFA.top5, 10, 2, 2)
plot.mf.grid(FABIA.top10, MOFA.top10, 1)
plot.mf.grid(FABIA.top10, MOFA.top10, 2)
plot.mf.grid(FABIA.top10, MOFA.top10, 3)
#dev.new(width = 10, height = 10, unit = "in", noRStudioGD = T)
plot.mf.grid(FABIA.top10, MOFA.top10, 5)
#dev.new(width = 20, height = 20, unit = "in", noRStudioGD = T)
plot.mf.grid(FABIA.top10, MOFA.top10, 10)
plot.mf.grid(FABIA.top25, MOFA.top25, 1)
plot.mf.grid(FABIA.top25, MOFA.top25, 2)
plot.mf.grid(FABIA.top25, MOFA.top25, 3)
#dev.new(width = 10, height = 10, unit = "in", noRStudioGD = T)
plot.mf.grid(FABIA.top25, MOFA.top25, 5)
#dev.new(width = 20, height = 20, unit = "in", noRStudioGD = T)
plot.mf.grid(FABIA.top25, MOFA.top25, 10)